Mediation is tested through three regression models:
Four conditions of mediation:
How much of a reduction in the relationship between the predictor and outcome is necessary to infer mediation?
People tend to look for a change in significance, which can lead to the ‘all or nothing’ thinking that p-values encourage.
Two solutions:
What is bootstrapping?
Mediation models usually occur in stages - meaning separate models are run with different IV / DV combinations.
Sometimes people call these “steps”, but do not confuse this with hierarchical regression.
So, what does that do for data screening?
library(rio)
master <- import("data/mediation.sav")
str(master)
#> 'data.frame': 240 obs. of 3 variables:
#> $ previous: num 3 2 1 1 1 1 1 1 1 1 ...
#> ..- attr(*, "format.spss")= chr "F1.0"
#> ..- attr(*, "display_width")= int 14
#> $ facebook: num 3.67 5 4 4.5 4.5 ...
#> ..- attr(*, "format.spss")= chr "F8.2"
#> ..- attr(*, "display_width")= int 14
#> $ exam : num 1 1 2 7 6 1 2 1 1 1 ...
#> ..- attr(*, "format.spss")= chr "F8.2"
#> ..- attr(*, "display_width")= int 10
summary(master)
#> previous facebook exam
#> Min. :1.000 Min. :1.750 Min. : 1.000
#> 1st Qu.:1.000 1st Qu.:3.750 1st Qu.: 1.000
#> Median :1.000 Median :4.250 Median : 1.000
#> Mean :1.571 Mean :4.143 Mean : 1.825
#> 3rd Qu.:2.000 3rd Qu.:4.750 3rd Qu.: 2.000
#> Max. :7.000 Max. :5.000 Max. :11.000
#> NA's :1
master <- na.omit(master)model1 <- lm(exam ~ previous, data = master)
summary(model1)
#>
#> Call:
#> lm(formula = exam ~ previous, data = master)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.9092 -0.6477 -0.6477 0.3523 8.4062
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 1.33228 0.16749 7.955
#> previous 0.31539 0.08689 3.630
#> Pr(>|t|)
#> (Intercept) 0.0000000000000737 ***
#> previous 0.000348 ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 1.496 on 237 degrees of freedom
#> Multiple R-squared: 0.05266, Adjusted R-squared: 0.04867
#> F-statistic: 13.17 on 1 and 237 DF, p-value: 0.0003476model2 <- lm(facebook ~ previous, data = master)
summary(model2)
#>
#> Call:
#> lm(formula = facebook ~ previous, data = master)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -2.44609 -0.44609 0.05391 0.55391 1.35640
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 4.28817 0.08195 52.330
#> previous -0.09208 0.04251 -2.166
#> Pr(>|t|)
#> (Intercept) <0.0000000000000002 ***
#> previous 0.0313 *
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.732 on 237 degrees of freedom
#> Multiple R-squared: 0.01941, Adjusted R-squared: 0.01527
#> F-statistic: 4.692 on 1 and 237 DF, p-value: 0.03131model3 <- lm(exam ~ previous + facebook, data = master)
summary(model3)
#>
#> Call:
#> lm(formula = exam ~ previous + facebook, data = master)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -2.6065 -0.7666 -0.3117 0.3850 7.9999
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 3.93294 0.56791 6.925 0.0000000000409
#> previous 0.25954 0.08397 3.091 0.00224
#> facebook -0.60647 0.12705 -4.773 0.0000031782120
#>
#> (Intercept) ***
#> previous **
#> facebook ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 1.432 on 236 degrees of freedom
#> Multiple R-squared: 0.1361, Adjusted R-squared: 0.1288
#> F-statistic: 18.59 on 2 and 236 DF, p-value: 0.00000003194The Aroian Test: \(Z = \frac{a \times b}{\sqrt{b^2 \times SD_a^2 + a^2 \times SD_b^2 + SD_a^2 \times SD_b^2}}\)
If the indirect effect is larger than the error, we would conclude that the addition of the M variable changed the c path.
#aroian sobel
a <- coef(model2)[2]
b <- coef(model3)[3]
SEa <- summary(model2)$coefficients[2,2]
SEb <- summary(model3)$coefficients[3,2]
zscore <- (a*b)/(sqrt((b^2*SEa^2)+(a^2*SEb^2)+(SEa^2*SEb^2)))
zscore
#> previous
#> 1.937494
#two tailed test
pnorm(abs(zscore), lower.tail = F)*2
#> previous
#> 0.05268497#devtools::install_github("doomlab/MeMoBootR")
library(MeMoBootR)
#> Loading required package: diagram
#> Loading required package: shape
#>
#> Attaching package: 'shape'
#> The following object is masked from 'package:corrplot':
#>
#> colorlegend
#no missing data allowed
med_results <- mediation1(y = "exam",
x = "previous",
m = "facebook",
df = master)head(med_results$datascreening$fulldata)
#> previous facebook exam badmahal badleverage badcooks
#> 1 3 3.666667 1 0 0 0
#> 2 2 5.000000 1 0 0 0
#> 3 1 4.000000 2 0 0 0
#> 4 1 4.500000 7 0 0 1
#> 5 1 4.500000 6 0 0 1
#> 6 1 4.500000 1 0 0 0
#> totalout
#> 1 0
#> 2 0
#> 3 0
#> 4 1
#> 5 1
#> 6 0
med_results$datascreening$correl
#> (Intercept) previous facebook
#> (Intercept) 1.0000000 -0.3617623 -0.9593469
#> previous -0.3617623 1.0000000 0.1393247
#> facebook -0.9593469 0.1393247 1.0000000
med_results$datascreening$linearitysummary(med_results$model1)
#>
#> Call:
#> lm(formula = allformulas$eq1, data = finaldata)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.9092 -0.6477 -0.6477 0.3523 8.4062
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 1.33228 0.16749 7.955
#> previous 0.31539 0.08689 3.630
#> Pr(>|t|)
#> (Intercept) 0.0000000000000737 ***
#> previous 0.000348 ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 1.496 on 237 degrees of freedom
#> Multiple R-squared: 0.05266, Adjusted R-squared: 0.04867
#> F-statistic: 13.17 on 1 and 237 DF, p-value: 0.0003476
summary(med_results$model2)
#>
#> Call:
#> lm(formula = allformulas$eq2, data = finaldata)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -2.44609 -0.44609 0.05391 0.55391 1.35640
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 4.28817 0.08195 52.330
#> previous -0.09208 0.04251 -2.166
#> Pr(>|t|)
#> (Intercept) <0.0000000000000002 ***
#> previous 0.0313 *
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.732 on 237 degrees of freedom
#> Multiple R-squared: 0.01941, Adjusted R-squared: 0.01527
#> F-statistic: 4.692 on 1 and 237 DF, p-value: 0.03131
summary(med_results$model3)
#>
#> Call:
#> lm(formula = allformulas$eq3, data = finaldata)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -2.6065 -0.7666 -0.3117 0.3850 7.9999
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 3.93294 0.56791 6.925 0.0000000000409
#> previous 0.25954 0.08397 3.091 0.00224
#> facebook -0.60647 0.12705 -4.773 0.0000031782120
#>
#> (Intercept) ***
#> previous **
#> facebook ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 1.432 on 236 degrees of freedom
#> Multiple R-squared: 0.1361, Adjusted R-squared: 0.1288
#> F-statistic: 18.59 on 2 and 236 DF, p-value: 0.00000003194Last, let’s get the bootstrapped results:
med_results$boot.results
#>
#> ORDINARY NONPARAMETRIC BOOTSTRAP
#>
#>
#> Call:
#> boot(data = finaldata, statistic = indirectmed, R = nboot, formula2 = allformulas$eq2,
#> formula3 = allformulas$eq3, x = x, med.var = m)
#>
#>
#> Bootstrap Statistics :
#> original bias std. error
#> t1* 0.05584541 0.002617188 0.03151453
med_results$boot.ci
#> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
#> Based on 1000 bootstrap replicates
#>
#> CALL :
#> boot.ci(boot.out = bootresults, conf = conf_level, type = "norm")
#>
#> Intervals :
#> Level Normal
#> 95% (-0.0085, 0.1150 )
#> Calculations and Intervals on Original Scale
med_results$diagramThe combined effect of two variables on another is known conceptually as moderation, and in statistical terms as an interaction effect.
Do violent video games make people aggressive?
DV: Aggression
IV:
The interaction term makes the bs for the main predictors uninterpretable in many situations.
Centering refers to the process of transforming a variable into deviations around a fixed point.
Centering solves two problems:
Easiest way to center to make them useful:
Centered variables for main effects have two interpretations
For continuous variables, you “create” low, average, and high groups.
master <- import("data/moderation.sav")
str(master)
#> 'data.frame': 442 obs. of 4 variables:
#> $ ID : num 69 55 7 96 130 124 72 139 102 179 ...
#> ..- attr(*, "label")= chr "Case ID"
#> ..- attr(*, "format.spss")= chr "F3.0"
#> $ Aggression: num 13 38 30 23 25 46 41 22 35 23 ...
#> ..- attr(*, "label")= chr "Agression"
#> ..- attr(*, "format.spss")= chr "F2.0"
#> $ Vid_Games : num 16 12 32 10 11 29 23 15 20 20 ...
#> ..- attr(*, "label")= chr "Video Games (Hours per week)"
#> ..- attr(*, "format.spss")= chr "F2.0"
#> $ CaUnTs : num 0 0 0 1 1 1 2 3 3 3 ...
#> ..- attr(*, "label")= chr "Callous Unemotional Traits"
#> ..- attr(*, "format.spss")= chr "F2.0"Center your variables using scale()
For data screening, we would screen the model with the interaction, and all other considerations are the same as regression.
Run the model:
summary(modmodel)
#>
#> Call:
#> lm(formula = Aggression ~ zvid * zcal, data = master)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -29.7144 -6.9087 -0.1923 6.9036 29.2290
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 39.967108 0.475057 84.131
#> zvid 0.169625 0.068473 2.477
#> zcal 0.760093 0.049458 15.368
#> zvid:zcal 0.027062 0.006981 3.877
#> Pr(>|t|)
#> (Intercept) < 0.0000000000000002 ***
#> zvid 0.013616 *
#> zcal < 0.0000000000000002 ***
#> zvid:zcal 0.000122 ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 9.976 on 438 degrees of freedom
#> Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
#> F-statistic: 88.46 on 3 and 438 DF, p-value: < 0.00000000000000022So, how do we create groups when we have a continuous variable?
Create the low and high versions of the M variable (callousness in this example)
#create the low and high z score variables
master$zcallow <- master$zcal + sd(master$zcal) #bring them up
master$zcalhigh <- master$zcal - sd(master$zcal) #bring them down
summary(master)
#> ID Aggression Vid_Games
#> Min. : 1.0 Min. : 9.00 Min. : 1.00
#> 1st Qu.:111.2 1st Qu.:32.00 1st Qu.:17.00
#> Median :221.5 Median :40.00 Median :22.00
#> Mean :221.5 Mean :40.05 Mean :21.84
#> 3rd Qu.:331.8 3rd Qu.:48.00 3rd Qu.:26.00
#> Max. :442.0 Max. :82.00 Max. :38.00
#> CaUnTs zvid.V1
#> Min. : 0.0 Min. :-20.843891
#> 1st Qu.:11.0 1st Qu.: -4.843891
#> Median :18.0 Median : 0.156109
#> Mean :18.6 Mean : 0.000000
#> 3rd Qu.:26.0 3rd Qu.: 4.156109
#> Max. :43.0 Max. : 16.156109
#> zcal.V1 zcallow.V1
#> Min. :-18.595023 Min. :-8.97733
#> 1st Qu.: -7.595023 1st Qu.: 2.02267
#> Median : -0.595023 Median : 9.02267
#> Mean : 0.000000 Mean : 9.61769
#> 3rd Qu.: 7.404977 3rd Qu.:17.02267
#> Max. : 24.404977 Max. :34.02267
#> zcalhigh.V1
#> Min. :-28.212716
#> 1st Qu.:-17.212716
#> Median :-10.212716
#> Mean : -9.617693
#> 3rd Qu.: -2.212716
#> Max. : 14.787284summary(modmodellow) #low slope
#>
#> Call:
#> lm(formula = Aggression ~ zvid * zcallow, data = master)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -29.7144 -6.9087 -0.1923 6.9036 29.2290
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 32.656771 0.672018 48.595
#> zvid -0.090651 0.099105 -0.915
#> zcallow 0.760093 0.049458 15.368
#> zvid:zcallow 0.027062 0.006981 3.877
#> Pr(>|t|)
#> (Intercept) < 0.0000000000000002 ***
#> zvid 0.360854
#> zcallow < 0.0000000000000002 ***
#> zvid:zcallow 0.000122 ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 9.976 on 438 degrees of freedom
#> Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
#> F-statistic: 88.46 on 3 and 438 DF, p-value: < 0.00000000000000022summary(modmodel) #average slope
#>
#> Call:
#> lm(formula = Aggression ~ zvid * zcal, data = master)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -29.7144 -6.9087 -0.1923 6.9036 29.2290
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 39.967108 0.475057 84.131
#> zvid 0.169625 0.068473 2.477
#> zcal 0.760093 0.049458 15.368
#> zvid:zcal 0.027062 0.006981 3.877
#> Pr(>|t|)
#> (Intercept) < 0.0000000000000002 ***
#> zvid 0.013616 *
#> zcal < 0.0000000000000002 ***
#> zvid:zcal 0.000122 ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 9.976 on 438 degrees of freedom
#> Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
#> F-statistic: 88.46 on 3 and 438 DF, p-value: < 0.00000000000000022summary(modmodelhigh) #high slope
#>
#> Call:
#> lm(formula = Aggression ~ zvid * zcalhigh, data = master)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -29.7144 -6.9087 -0.1923 6.9036 29.2290
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 47.277446 0.672520 70.299
#> zvid 0.429901 0.092577 4.644
#> zcalhigh 0.760093 0.049458 15.368
#> zvid:zcalhigh 0.027062 0.006981 3.877
#> Pr(>|t|)
#> (Intercept) < 0.0000000000000002 ***
#> zvid 0.00000453 ***
#> zcalhigh < 0.0000000000000002 ***
#> zvid:zcalhigh 0.000122 ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 9.976 on 438 degrees of freedom
#> Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
#> F-statistic: 88.46 on 3 and 438 DF, p-value: < 0.00000000000000022library(ggplot2)
cleanup <- theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line.x = element_line(color = "black"),
axis.line.y = element_line(color = "black"),
legend.key = element_rect(fill = "white"),
text = element_text(size = 15))
modgraph <- ggplot(master, aes(zvid, Aggression))
##change Cal to the new moderator label
##change xlab for the new X label
modgraph +
xlab("Centered Video Games") +
geom_point(color = "gray") +
geom_abline(aes(intercept = modmodellow$coefficients[1],
slope = modmodellow$coefficients[2],
linetype = "-1SD Cal"), size = 1) +
geom_abline(aes(intercept = modmodel$coefficients[1],
slope = modmodel$coefficients[2],
linetype = "Average Cal"), size = 1) +
geom_abline(aes(intercept = modmodelhigh$coefficients[1],
slope = modmodelhigh$coefficients[2],
linetype = "+1SD Cal"), size = 1) +
scale_linetype_manual(values = c("dotted", "dashed", "solid"),
breaks = c("-1SD Cal", "Average Cal", "+1SD Cal"),
name = "Simple Slope") +
cleanup MeMoBootR to complete the entire processing, including data screening for us!#data screening
head(mod_model$datascreening$fulldata)
#> ID Aggression Vid_Games CaUnTs zvid zcal
#> 1 69 13 16 0 -5.843891 -18.59502
#> 2 55 38 12 0 -9.843891 -18.59502
#> 3 7 30 32 0 10.156109 -18.59502
#> 4 96 23 10 1 -11.843891 -17.59502
#> 5 130 25 11 1 -10.843891 -17.59502
#> 6 124 46 29 1 7.156109 -17.59502
#> zcallow zcalhigh badmahal badleverage badcooks
#> 1 -8.97733 -28.21272 0 0 1
#> 2 -8.97733 -28.21272 0 1 0
#> 3 -8.97733 -28.21272 0 1 0
#> 4 -7.97733 -27.21272 0 1 0
#> 5 -7.97733 -27.21272 0 1 0
#> 6 -7.97733 -27.21272 0 1 1
#> totalout
#> 1 1
#> 2 1
#> 3 1
#> 4 1
#> 5 1
#> 6 2
#mod_model$datascreening$correl
#mod_model$datascreening$linearity
#mod_model$datascreening$normality
#mod_model$datascreening$homogen
#models
summary(mod_model$model1)
#>
#> Call:
#> lm(formula = allformulas$eq1, data = finaldata)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -29.7144 -6.9087 -0.1923 6.9036 29.2290
#>
#> Coefficients:
#> Estimate Std. Error t value
#> (Intercept) 39.967108 0.475057 84.131
#> Vid_Games 0.169625 0.068473 2.477
#> CaUnTs 0.760093 0.049458 15.368
#> Vid_Games:CaUnTs 0.027062 0.006981 3.877
#> Pr(>|t|)
#> (Intercept) < 0.0000000000000002 ***
#> Vid_Games 0.013616 *
#> CaUnTs < 0.0000000000000002 ***
#> Vid_Games:CaUnTs 0.000122 ***
#> ---
#> Signif. codes:
#> 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 9.976 on 438 degrees of freedom
#> Multiple R-squared: 0.3773, Adjusted R-squared: 0.373
#> F-statistic: 88.46 on 3 and 438 DF, p-value: < 0.00000000000000022
#summary(mod_model$model1low)
#summary(mod_model$model1high)
mod_model$interpretation
#> [1] "At low levels of CaUnTs, you see that every unit increase in Vid_Games predicts -0.09 unit change in Aggression. \n\nAt average levels of CaUnTs, you see that every unit increase in Vid_Games predicts 0.17 unit change in Aggression. \n\nAt high levels of CaUnTs, you see that every unit increase in Vid_Games predicts 0.43 unit change in Aggression."
#graphs
mod_model$graphslopesIn this lecture, we have covered: